{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "#!wget -c ftp://ngs.sanger.ac.uk/production/ag1000g/phase1/AR3/variation/main/hdf5/ag1000g.phase1.ar3.pass.3L.h5\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "'0.19.2'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from multiprocessing.pool import Pool\n",
    "from math import ceil\n",
    "\n",
    "import numpy as np\n",
    "\n",
    "import h5py\n",
    "\n",
    "import dask\n",
    "import dask.array as da\n",
    "import dask.multiprocessing\n",
    "dask.__version__"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "h5_3L = h5py.File('ag1000g.phase1.ar3.pass.3L.h5', 'r')\n",
    "samples = h5_3L['/3L/samples']\n",
    "positions = h5_3L['/3L/variants/POS']\n",
    "num_samples = len(samples)\n",
    "del samples"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "0 9790\n",
      "1000000 11842226\n",
      "2000000 16486667\n",
      "3000000 19736472\n",
      "4000000 23745887\n",
      "5000000 27394730\n",
      "6000000 30373865\n",
      "7000000 33609094\n",
      "8000000 36894938\n",
      "9000000 40144443\n"
     ]
    }
   ],
   "source": [
    "window_size = 50000\n",
    "last_position = positions[-1]\n",
    "num_windows = ceil(last_position / window_size)\n",
    "limits = np.full((num_windows, 2), -1)\n",
    "curr_window = positions[0] // window_size\n",
    "limits[curr_window][0] = 0\n",
    "for index, position in enumerate(positions):\n",
    "    my_window = position // window_size\n",
    "    if index % 1000000 == 0:\n",
    "        print(index, position)\n",
    "    if my_window != curr_window:\n",
    "        limits[my_window, 0] = index\n",
    "        limits[curr_window, 1] = index - 1\n",
    "        curr_window = my_window\n",
    "limits[num_windows - 1, 1] = len(positions)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "limits = da.from_array(limits, chunks=(60, 2))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "dask.array<getitem, shape=(2,), dtype=int64, chunksize=(2,)> dask.array<getitem, shape=(2,), dtype=int64, chunksize=(2,)>\n"
     ]
    }
   ],
   "source": [
    "print(limits[0], limits[-1])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "ename": "NameError",
     "evalue": "name 'positions' is not defined",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m                                 Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-10-991a5e828b1b>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mprint\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpositions\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m-\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m//\u001b[0m \u001b[0mwindow_size\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mnum_windows\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;31mNameError\u001b[0m: name 'positions' is not defined"
     ]
    }
   ],
   "source": [
    "print(positions[-1] // window_size, num_windows)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [],
   "source": [
    "h5_3L.close()\n",
    "del h5_3L, positions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [],
   "source": [
    "def get_hdf5():\n",
    "    global calldata_genotype, alt_alleles, is_snp, num_samples\n",
    "    \n",
    "    try:\n",
    "        calldata_genotype\n",
    "    except NameError:\n",
    "        import os\n",
    "        print('Open', os.getpid())\n",
    "        h5_3L = h5py.File('ag1000g.phase1.ar3.pass.3L.h5', 'r')\n",
    "        samples = h5_3L['/3L/samples']\n",
    "        calldata_genotype = h5_3L['/3L/calldata/genotype']\n",
    "        alt_alleles = h5_3L['/3L/variants/ALT']\n",
    "        is_snp = h5_3L['/3L/variants/is_snp']\n",
    "    \n",
    "    return calldata_genotype, is_snp, alt_alleles, num_samples\n",
    "    \n",
    "@da.as_gufunc(signature=\"(i)->()\", output_dtypes=dict, vectorize=True)\n",
    "def calc_statistics(v):\n",
    "    calldata_genotype, is_snp, alt_alleles, num_samples = get_hdf5()\n",
    "    start, end = v[0], v[1]\n",
    "    min_maf = 0.5\n",
    "    max_maf = 0.0\n",
    "    non_bi = 0\n",
    "    missing = 0\n",
    "    non_snp = 0\n",
    "    print(v)\n",
    "    for pos in range(start, end + 1):\n",
    "        if not is_snp[pos]:\n",
    "            non_snp += 1\n",
    "            continue\n",
    "        if np.max(calldata_genotype[pos]) > 1:  \n",
    "               non_bi += 1\n",
    "               continue\n",
    "        if np.min(calldata_genotype[pos]) < 0:  \n",
    "               missing += 1\n",
    "               continue\n",
    "        num_alt = np.sum(calldata_genotype[pos])  # Because they are coded as 1\n",
    "        num_ref = num_samples * 2 - num_alt  # (because all are called)\n",
    "        min_called = min(num_ref, num_alt)\n",
    "        maf = min_called / (2 * num_samples)\n",
    "        if maf < min_maf:\n",
    "            min_maf = maf\n",
    "        if maf > max_maf:\n",
    "            max_maf = maf\n",
    "    return {'total': end - start + 1, 'missing': missing,\n",
    "            'non_snp': non_snp, 'non_bi': non_bi,\n",
    "            'min_maf': min_maf, 'max_maf': max_maf}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "scrolled": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Open 5777\n",
      "Open 5775\n",
      "Open 5776\n",
      "[131182 134910]\n",
      "[ 0 43]\n",
      "[345522 349568]\n",
      "Open 5778\n",
      "Open 5779\n",
      "Open 5782\n",
      "[635677 642989]\n",
      "Open 5780\n",
      "[1031921 1040914]\n",
      "[3296639 3308677]\n",
      "Open 5781\n",
      "[2446867 2465060]\n",
      "[1689745 1708060]\n",
      "[ 44 965]\n"
     ]
    },
    {
     "ename": "KeyboardInterrupt",
     "evalue": "",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m                         Traceback (most recent call last)",
      "\u001b[0;32m<ipython-input-15-df45fb5eddcf>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m      2\u001b[0m \u001b[0mstats\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m      3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mdask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconfig\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mscheduler\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'multiprocessing'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m     \u001b[0mstats\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcalc_statistics\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlimits\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/dask/base.py\u001b[0m in \u001b[0;36mcompute\u001b[0;34m(self, **kwargs)\u001b[0m\n\u001b[1;32m    154\u001b[0m         \u001b[0mdask\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mbase\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcompute\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    155\u001b[0m         \"\"\"\n\u001b[0;32m--> 156\u001b[0;31m         \u001b[0;34m(\u001b[0m\u001b[0mresult\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mcompute\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtraverse\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    157\u001b[0m         \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    158\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/dask/base.py\u001b[0m in \u001b[0;36mcompute\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    393\u001b[0m     \u001b[0mkeys\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dask_keys__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcollections\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    394\u001b[0m     \u001b[0mpostcomputes\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mx\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m__dask_postcompute__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mx\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mcollections\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 395\u001b[0;31m     \u001b[0mresults\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mschedule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdsk\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mkeys\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    396\u001b[0m     \u001b[0;32mreturn\u001b[0m \u001b[0mrepack\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mfor\u001b[0m \u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mf\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0ma\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32min\u001b[0m \u001b[0mzip\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mresults\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpostcomputes\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    397\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/dask/multiprocessing.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(dsk, keys, num_workers, func_loads, func_dumps, optimize_graph, **kwargs)\u001b[0m\n\u001b[1;32m    170\u001b[0m                            \u001b[0mget_id\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0m_process_get_id\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdumps\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdumps\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mloads\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mloads\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    171\u001b[0m                            \u001b[0mpack_exception\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mpack_exception\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 172\u001b[0;31m                            raise_exception=reraise, **kwargs)\n\u001b[0m\u001b[1;32m    173\u001b[0m     \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    174\u001b[0m         \u001b[0;32mif\u001b[0m \u001b[0mcleanup\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/dask/local.py\u001b[0m in \u001b[0;36mget_async\u001b[0;34m(apply_async, num_workers, dsk, result, cache, get_id, rerun_exceptions_locally, pack_exception, raise_exception, callbacks, dumps, loads, **kwargs)\u001b[0m\n\u001b[1;32m    490\u001b[0m             \u001b[0;31m# Main loop, wait on tasks to finish, insert new ones\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    491\u001b[0m             \u001b[0;32mwhile\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'waiting'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'ready'\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;32mor\u001b[0m \u001b[0mstate\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'running'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 492\u001b[0;31m                 \u001b[0mkey\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mres_info\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mfailed\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mqueue_get\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mqueue\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    493\u001b[0m                 \u001b[0;32mif\u001b[0m \u001b[0mfailed\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    494\u001b[0m                     \u001b[0mexc\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtb\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mloads\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mres_info\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/site-packages/dask/local.py\u001b[0m in \u001b[0;36mqueue_get\u001b[0;34m(q)\u001b[0m\n\u001b[1;32m    139\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    140\u001b[0m     \u001b[0;32mdef\u001b[0m \u001b[0mqueue_get\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mq\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 141\u001b[0;31m         \u001b[0;32mreturn\u001b[0m \u001b[0mq\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    142\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    143\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/queue.py\u001b[0m in \u001b[0;36mget\u001b[0;34m(self, block, timeout)\u001b[0m\n\u001b[1;32m    162\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    163\u001b[0m                 \u001b[0;32mwhile\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_qsize\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 164\u001b[0;31m                     \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mnot_empty\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mwait\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    165\u001b[0m             \u001b[0;32melif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;34m<\u001b[0m \u001b[0;36m0\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    166\u001b[0m                 \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"'timeout' must be a non-negative number\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;32m~/anaconda3/lib/python3.6/threading.py\u001b[0m in \u001b[0;36mwait\u001b[0;34m(self, timeout)\u001b[0m\n\u001b[1;32m    293\u001b[0m         \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m    \u001b[0;31m# restore state no matter what (e.g., KeyboardInterrupt)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    294\u001b[0m             \u001b[0;32mif\u001b[0m \u001b[0mtimeout\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 295\u001b[0;31m                 \u001b[0mwaiter\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0macquire\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m    296\u001b[0m                 \u001b[0mgotit\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mTrue\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m    297\u001b[0m             \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
      "\u001b[0;31mKeyboardInterrupt\u001b[0m: "
     ]
    }
   ],
   "source": [
    "stats = None\n",
    "with dask.config.set(scheduler='multiprocessing'):\n",
    "    stats = calc_statistics(limits).compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "limits[0,0].compute()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "dask.array<getitem, shape=(2,), dtype=int64, chunksize=(2,)>"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "limits[0]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [],
   "source": [
    "#persist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([{'total': 44, 'missing': 3, 'non_snp': 0, 'non_bi': 4, 'min_maf': 0.00065359477124183, 'max_maf': 0.0196078431372549},\n",
       "       {'total': 922, 'missing': 153, 'non_snp': 0, 'non_bi': 32, 'min_maf': 0.00065359477124183, 'max_maf': 0.4869281045751634},\n",
       "       {'total': 947, 'missing': 178, 'non_snp': 0, 'non_bi': 37, 'min_maf': 0.00065359477124183, 'max_maf': 0.4516339869281046},\n",
       "       {'total': 1508, 'missing': 77, 'non_snp': 0, 'non_bi': 49, 'min_maf': 0.00065359477124183, 'max_maf': 0.39477124183006534},\n",
       "       {'total': 16, 'missing': 0, 'non_snp': 0, 'non_bi': 2, 'min_maf': 0.00065359477124183, 'max_maf': 0.3235294117647059},\n",
       "       {'total': 367, 'missing': 8, 'non_snp': 0, 'non_bi': 10, 'min_maf': 0.0, 'max_maf': 0.4503267973856209},\n",
       "       {'total': 1235, 'missing': 8, 'non_snp': 0, 'non_bi': 54, 'min_maf': 0.0, 'max_maf': 0.3954248366013072},\n",
       "       {'total': 1570, 'missing': 32, 'non_snp': 0, 'non_bi': 68, 'min_maf': 0.00065359477124183, 'max_maf': 0.3934640522875817},\n",
       "       {'total': 193, 'missing': 0, 'non_snp': 0, 'non_bi': 4, 'min_maf': 0.00065359477124183, 'max_maf': 0.2549019607843137},\n",
       "       {'total': 255, 'missing': 39, 'non_snp': 0, 'non_bi': 16, 'min_maf': 0.00065359477124183, 'max_maf': 0.4117647058823529}],\n",
       "      dtype=object)"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "stats"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "#dask.config.set(scheduler='synchronous')\n",
    "#dask.config.set(scheduler='threads')\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
